### Preliminary ----------------------------------------------------------------
# Load required libraries
library(tidyverse)    # For data manipulation and visualization
library(stringr)      # For string operations (str_c)
library(fixest)       # For fixed effects regression models
library(did)          # For Callaway-Sant'Anna DiD estimator
library(modelsummary) # For creating regression tables

# Load legislator-level data
leg_df <- read_csv("State Legislator Panel.csv")

### Creating Figure 4 ----------------------------------------------------------
# Compare three difference-in-differences estimation approaches:
# 1. Two-way fixed effects (2WFE)
# 2. Stacked DiD
# 3. Callaway-Sant'Anna (CS) DiD estimator

# Prepare data: identify treatment timing and create stacked cohorts
model_data <- leg_df %>%
  group_by(klarner_id) %>%
  arrange(election_year) %>%
  mutate(
    # Record initial and final viability status for each legislator
    initial_viable = first(viable),
    final_viable = last(viable),
    # Create time periods (2-year bins)
    period = case_when(
      election_year %in% c(2010, 2011) ~ 1,
      election_year %in% c(2012, 2013) ~ 2,
      election_year %in% c(2014, 2015) ~ 3,
      election_year %in% c(2016, 2017) ~ 4,
      election_year %in% c(2018, 2019) ~ 5,
      election_year == 2020 ~ 6),
    # Identify when legislator switches to viable status
    switch = ifelse(viable == 1 & lag(viable) == 0, period, 0),
    switch = max(switch),
    # Create state-chamber-party identifier
    scp_id = str_c(state, chamber, party)) %>%
  # Focus on legislators who start as non-viable
  filter(initial_viable == 0)

# Build stacked dataset for stacked DiD approach
# Each treatment cohort is stacked with a clean control group
for(switch_period in 3:6){
  subset <- model_data %>%
    filter(switch == switch_period | switch == 0) %>%
    mutate(time_to_treat = period - switch_period,
           switch_period = switch_period,
           ever_treated = ifelse(switch == 0, 0, 1))

  if(switch_period == 3){
    stacked_df <- subset
  } else {
    stacked_df <- bind_rows(stacked_df, subset)
  }
}

# Model 1: Traditional two-way fixed effects (2WFE)
model_2wfe <- feols(avg_opp_prop ~ viable + party_pres_vs + seniority |
                      klarner_id + election_year,
                    data = model_data)

# Model 2: Stacked DiD with cohort-specific fixed effects
model_stacked <- feols(avg_opp_prop ~ viable + party_pres_vs + seniority |
                         klarner_id^switch_period + time_to_treat,
                       data = stacked_df)

# Model 3: Callaway-Sant'Anna DiD estimator
model_cs_gt <- att_gt(yname = "avg_opp_prop",
                      tname = "period",
                      idname = "klarner_id",
                      gname = "switch",
                      alp = 0.10,
                      allow_unbalanced_panel = TRUE,
                      data = model_data)

# Aggregate CS estimates to get simple overall ATT
model_cs <- aggte(model_cs_gt, type = "simple", na.rm = TRUE)

# Calculate p-value for CS estimator
t_stat <- (model_cs$overall.att / model_cs$overall.se)
p_value_cs <- 2 * pt(abs(t_stat), df = nrow(model_data), lower.tail = FALSE)
cat("CS DiD p-value:", p_value_cs, "\n")

# Prepare data for plotting
graphing_data_fig4 <- data.frame(
  method = c("2WFE", "Stacked DiD", "CS DiD Estimator"),
  estimate = c(coef(model_2wfe)[1], coef(model_stacked)[1], model_cs$overall.att),
  lower_90 = c(confint(model_2wfe, level = 0.90, parm = "viable")[[1]],
               confint(model_stacked, level = 0.90, parm = "viable")[[1]],
               model_cs$overall.att - 1.64 * model_cs$overall.se),
  lower_95 = c(confint(model_2wfe, level = 0.95, parm = "viable")[[1]],
               confint(model_stacked, level = 0.95, parm = "viable")[[1]],
               model_cs$overall.att - 1.96 * model_cs$overall.se),
  upper_90 = c(confint(model_2wfe, level = 0.90, parm = "viable")[[2]],
               confint(model_stacked, level = 0.90, parm = "viable")[[2]],
               model_cs$overall.att + 1.64 * model_cs$overall.se),
  upper_95 = c(confint(model_2wfe, level = 0.95, parm = "viable")[[2]],
               confint(model_stacked, level = 0.95, parm = "viable")[[2]],
               model_cs$overall.att + 1.96 * model_cs$overall.se)) %>%
  mutate(method = factor(method, levels = rev(c("2WFE", "Stacked DiD", "CS DiD Estimator"))))

# Generate Figure 4: Comparison plot across three DiD methods
ggplot(graphing_data_fig4, aes(x = estimate, y = method)) +
  geom_vline(aes(xintercept = 0), lty = "dashed") +
  geom_vline(aes(xintercept = -0.021), lty = "dashed", color = "gray") +
  geom_errorbarh(aes(xmin = lower_90, xmax = upper_90), linewidth = 1, height = 0) +
  geom_errorbarh(aes(xmin = lower_95, xmax = upper_95), linewidth = 1/2, height = 0) +
  geom_point(size = 3, shape = 21, color = "black", fill = "white") +
  scale_x_continuous(limits = c(-0.06, 0.06)) +
  labs(x = "Estimated Effect of Viability \n on Bipartisan Collaboration",
       y = NULL) +
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )


### Creating Figure J1 ---------------------------------------------------------
# Robustness check: Test sensitivity to different viability thresholds
# Viability defined as having X% of co-partisans in the district

# Threshold 1: Any co-partisan representation (baseline)
model_thresh_any <- feols(avg_opp_prop ~ viable + party_pres_vs + seniority |
                            klarner_id + election_year,
                          data = model_data)

# Threshold 2: At least 25% co-partisan representation
model_thresh_25 <- model_data %>%
  mutate(viable = case_when(
    party == "Rep" & prop_rep >= 0.25 ~ 1,
    party == "Dem" & prop_dem >= 0.25 ~ 1,
    TRUE ~ 0)) %>%
  feols(avg_opp_prop ~ viable + party_pres_vs + seniority |
          klarner_id + election_year,
        data = .)

# Threshold 3: At least 33% co-partisan representation
model_thresh_33 <- model_data %>%
  mutate(viable = case_when(
    party == "Rep" & prop_rep >= 0.33 ~ 1,
    party == "Dem" & prop_dem >= 0.33 ~ 1,
    TRUE ~ 0)) %>%
  feols(avg_opp_prop ~ viable + party_pres_vs + seniority |
          klarner_id + election_year,
        data = .)

# Threshold 4: At least 50% co-partisan representation (majority)
model_thresh_50 <- model_data %>%
  mutate(viable = case_when(
    party == "Rep" & prop_rep >= 0.5 ~ 1,
    party == "Dem" & prop_dem >= 0.5 ~ 1,
    TRUE ~ 0)) %>%
  feols(avg_opp_prop ~ viable + party_pres_vs + seniority |
          klarner_id + election_year,
        data = .)

# Prepare data for plotting
graphing_data_figJ1 <- data.frame(
  threshold = c("Any", "25%", "33%", "50%"),
  estimate = c(coef(model_thresh_any)[1], coef(model_thresh_25)[1],
               coef(model_thresh_33)[1], coef(model_thresh_50)[1]),
  lower_90 = c(confint(model_thresh_any, level = 0.90, parm = "viable")[[1]],
               confint(model_thresh_25, level = 0.90, parm = "viable")[[1]],
               confint(model_thresh_33, level = 0.90, parm = "viable")[[1]],
               confint(model_thresh_50, level = 0.90, parm = "viable")[[1]]),
  lower_95 = c(confint(model_thresh_any, level = 0.95, parm = "viable")[[1]],
               confint(model_thresh_25, level = 0.95, parm = "viable")[[1]],
               confint(model_thresh_33, level = 0.95, parm = "viable")[[1]],
               confint(model_thresh_50, level = 0.95, parm = "viable")[[1]]),
  upper_90 = c(confint(model_thresh_any, level = 0.90, parm = "viable")[[2]],
               confint(model_thresh_25, level = 0.90, parm = "viable")[[2]],
               confint(model_thresh_33, level = 0.90, parm = "viable")[[2]],
               confint(model_thresh_50, level = 0.90, parm = "viable")[[2]]),
  upper_95 = c(confint(model_thresh_any, level = 0.95, parm = "viable")[[1]],
               confint(model_thresh_25, level = 0.95, parm = "viable")[[2]],
               confint(model_thresh_33, level = 0.95, parm = "viable")[[2]],
               confint(model_thresh_50, level = 0.95, parm = "viable")[[2]])) %>%
  mutate(threshold = factor(threshold,
                            levels = rev(c("Any", "25%", "33%", "50%"))))

# Generate Figure J1: Robustness across thresholds
ggplot(graphing_data_figJ1, aes(x = estimate, y = threshold)) +
  geom_vline(aes(xintercept = 0), lty = "dashed") +
  geom_vline(aes(xintercept = -0.021), lty = "dashed", color = "gray") +
  geom_errorbarh(aes(xmin = lower_90, xmax = upper_90), linewidth = 1, height = 0) +
  geom_errorbarh(aes(xmin = lower_95, xmax = upper_95), linewidth = 1/2, height = 0) +
  geom_point(size = 3, shape = 21, color = "black", fill = "white") +
  scale_x_continuous(limits = c(-0.06, 0.06)) +
  labs(x = "Estimated Effect of Viability \n on Bipartisan Collaboration",
       y = "Amount of Overlap Required for Viability") +
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )


### Creating Table I1 ----------------------------------------------------------
# Test whether viability increases probability of running for Congress
# (validation check for the mechanism)

# Estimate reduced form model
model_congress <- feols(cong_ran ~ viable + party_pres_vs + seniority |
                          election_year + klarner_id,
                        vcov = "cluster",
                        data = leg_df)

# Report baseline probability
cat("Baseline probability of running for Congress:", mean(leg_df$cong_ran), "\n")

# Generate Table I1
modelsummary(list(model_congress),
             coef_map = c("viable" = "Viable for Congress",
                          "party_pres_vs" = "Party Presidential Vote Share",
                          "seniority" = "Years Served in Legislative State Chamber"),
             gof_map = c("nobs"),
             fmt = 3,
             add_rows = data.frame(
               name = c("Model", "Controls", "Legislator FEs", "Election Year FEs"),
               model_1 = c("Reduced Form (OLS)", "Y", "Y", "Y")),
             notes = "Standard errors clustered by legislator shown in parentheses underneath coefficient estimates.",
             stars = c("*" = 0.10,
                       "**" = 0.05,
                       "***" = 0.01))
